board_games <- readr::read_csv("/cloud/project/data/board_games.csv")
## Rows: 10532 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): description, image, name, thumbnail, artist, category, compilation...
## dbl (10): game_id, max_players, max_playtime, min_age, min_players, min_play...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

board_games%>%
  arrange(desc(max_playtime))
## # A tibble: 10,532 × 22
##    game_id description      image   max_players max_playtime min_age min_players
##      <dbl> <chr>            <chr>         <dbl>        <dbl>   <dbl>       <dbl>
##  1    4815 This is a war g… //cf.g…          10        60000      14           8
##  2   46669 (from GMT websi… //cf.g…           4        17280       0           2
##  3     254 (from the back … //cf.g…           7        12000      14           2
##  4    6942 Introduction:&#… //cf.g…           4        12000      12           2
##  5    1499 (from ADG websi… //cf.g…           7         6000      12           2
##  6    5622 Pacific War is … //cf.g…           2         6000      16           2
##  7   38578 Age of Muskets … //cf.g…           6         6000       0           2
##  8  173504 The Greatest Da… //cf.g…           8         6000      12           2
##  9    5651 The Longest Day… //cf.g…           8         5400      12           2
## 10   13532 (from the box:)… //cf.g…           2         4500      12           2
## # … with 10,522 more rows, and 15 more variables: min_playtime <dbl>,
## #   name <chr>, playing_time <dbl>, thumbnail <chr>, year_published <dbl>,
## #   artist <chr>, category <chr>, compilation <chr>, designer <chr>,
## #   expansion <chr>, family <chr>, mechanic <chr>, publisher <chr>,
## #   average_rating <dbl>, users_rated <dbl>
board_games <- board_games %>%
  mutate(
    playtime_group = case_when(
      playing_time < 1 ~ "N/A",
      playing_time >= 1 & playing_time <= 20 ~ "Under 20 minutes",
      playing_time >= 21 & playing_time <= 40 ~ "20-40 minutes",
      playing_time >= 41 & playing_time <= 60 ~ "40-60 minutes",
      playing_time >= 61 & playing_time <= 120 ~ "1-2 hours",
      playing_time >= 121 & playing_time <= 180 ~ "2-3 hours",
      playing_time >= 181 & playing_time <= 240 ~ "3-4 hours",
      playing_time >= 241 & playing_time <= 360 ~ "4-6 hours",
      playing_time >= 361 ~ "Over 6 hours"
    ))
board_games%>%
  count(playtime_group)%>%
  arrange(desc(n))
## # A tibble: 9 × 2
##   playtime_group       n
##   <chr>            <int>
## 1 40-60 minutes     2819
## 2 20-40 minutes     2210
## 3 Under 20 minutes  1972
## 4 1-2 hours         1896
## 5 2-3 hours          528
## 6 N/A                350
## 7 3-4 hours          344
## 8 4-6 hours          312
## 9 Over 6 hours       101

#{r} board_games%>% filter(playing_time < 1000)%>% arrange(desc(playing_time)) #

stacked barplot

board_games %>% 
  ggplot(aes(x = playtime_group , y = average_rating, fill=playtime_group)) +
  labs(title = "title",
       fill = "Playtime Group",
       x = "xaxis",
       y = "yaxis") +
  theme(axis.text.x=element_text(angle = -45, hjust = 0)) +
  geom_violin()

Research questions: Does this change with recommended ages? Does this change with different game categories?

library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(tidyverse)
library(heatmaply)
## 
## ======================
## Welcome to heatmaply version 1.3.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
# Heatmap Prep - Counts ------------------------------------------------------------

# fix filter stuff: 


categories <- board_games %>% 
  select(category) %>% 
  separate(category, into = c("category1", "category2", "category3", "category4"), sep = ",")
## Warning: Expected 4 pieces. Additional pieces discarded in 978 rows [23, 29,
## 30, 37, 52, 99, 106, 128, 132, 186, 190, 194, 195, 199, 205, 207, 216, 223, 233,
## 235, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 8198 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, ...].
cat_counts <- categories %>% 
  filter(is.na(category1) == FALSE,
         is.na(category2) == FALSE) %>% 
  group_by(category1, category2) %>% 
  count() %>% 
  arrange(desc(n)) 

top_cat1 <- categories %>% 
  group_by(category1) %>% 
  count() %>% 
  arrange(desc(n))

top_cat2 <- categories %>% 
  group_by(category2) %>% 
  count() %>% 
  arrange(desc(n))

top_cat1_list <- c("Card Game", "Abstract Strategy", "Wargame", "Economic", 
                   "Adventure", "Bluffing", "Action / Dexterity", "Animals", 
                   "Dice", "Ancient", "Fantasy", "Children's Game", 
                   "City Building", "Party Game", "Deduction", 
                   "Aviation / Flight", "Medieval", "Fighting", "Napoleonic", 
                   "American Civil War")

top_cat2_list <- c("Wargame", "Card Game", "Fantasy", "World War II",
                   "Children's Game", "Dice", "Fighting", "Economic", 
                   "Science Fiction", "Party Game", "Deduction", "Exploration",
                   "Humor", "Medieval", "Collectible Components", "Racing",
                   "Movies / TV/ Radio Theme", "Negotiation", "Miniatures", 
                   "Animals")

map_counts <- cat_counts %>% 
  filter(category1 %in% top_cat1_list & category2 %in% top_cat1_list)


# Heatmap Prep - Ratings --------------------------------------------------

rat_categories <- board_games %>% 
  select(category, average_rating, name) %>% 
  separate(category, into = c("category1", "category2", "category3", "category4"), sep = ",")
## Warning: Expected 4 pieces. Additional pieces discarded in 978 rows [23, 29,
## 30, 37, 52, 99, 106, 128, 132, 186, 190, 194, 195, 199, 205, 207, 216, 223, 233,
## 235, ...].

## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 8198 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, ...].
rat_averages <- rat_categories %>% 
  filter(is.na(category1) == FALSE,
         is.na(category2) == FALSE) %>% 
  group_by(category1, category2) %>% 
  summarize(avg_rat = mean(average_rating),
            count = n()) %>% 
  arrange(desc(avg_rat)) 
## `summarise()` has grouped output by 'category1'. You can override using the `.groups` argument.

Heatmap

colors <- c("#ffffff", "#f2f0f7", "#cbc9e2", "#9e9ac8", "#756bb1", "#54278f")

heatmap <- map_counts %>% 
  ggplot(aes(x = category2, y = category1, fill = n)) +
  geom_tile() +
  scale_fill_stepsn(colors = colors, values = c(0, 0.01, 0.1, 0.25, 0.6, 1))

Game Mechanics

# Gamesonyourphone_ %>% 
#   group_by(game_mechanic_pop) %>% 
#   filter(!is.na(game_mechanic_pop)) %>% 
#   summarize(count = n())%>% 
#   arrange(desc(count)) %>% 
#   mutate(proportion = (count/15)*100)
# Gamesonyourphone_ %>% 
#   group_by(game_mechanic_pop) %>% 
#   filter(!is.na(game_mechanic_pop)) %>% 
#   mutate(count = n(),
#          proportion = (count/15)*100) %>% 
# ggplot(mapping = aes(x = "", y = reorder(game_mechanic_pop, -proportion), fill = proportion, color = proportion)) +
#     geom_bar(stat = "identity") +
#     coord_polar(theta = "y", start = 0)+
#     labs(
#       title = "Bike rentals in DC",
#       subtitle = "by season",
#       x = "Season",
#       y = "Number of bikes rented")